Many of the figures and analysis methodology were based off of this tutorial from NYU. https://learn.gencore.bio.nyu.edu/rna-seq-analysis/gene-set-enrichment-analysis/
## Library Preparation
library(clusterProfiler)
library(DESeq2)
library(dplyr)
library(tidyverse)
library(presto)
library(ashr)
library(GOSemSim)
library(org.Ce.eg.db)
library(bookdown)
library(knitr)
library(DT)
library(htmltools)
# Call functions
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/translateBioIDs.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/runGO.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/strainFractionGO.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/emptyTableMessage.R")
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Calls translateBioIDs.R and geneOntologyAnalysis.R # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
strainFractionGO <- function(result.obj, strain, fraction, l2fc_filter, padj_filter){
# Create comment structure for bookdown render
cat(paste0("# ", strain," ", fraction, " {.tabset .tabset}\n\n"))
# Show exact call which invoked function
call_txt <- paste(deparse(match.call()), collapse = "\n")
cat("**Inputs for function call:**\n\n")
cat("```r\n")
cat(call_txt, "\n\n")
cat("```\n\n")
# Translate Wormbase IDs to ENTREZID
result.obj.list <- translateBioIDs(
DESeqResults.obj = result.obj,
bioID = "WORMBASE",
l2fc_filter = l2fc_filter,
padj_filter = padj_filter
)
# Run gruopGO(), enrichGO(), gseGO(), and goplot()
go.result.list <- runGO(
geneNames.up = result.obj.list$upregulated_genes[,1],
geneNames.down = result.obj.list$downregulated_genes[,1],
geneList = result.obj.list$vectorized_all_DE,
universe = result.obj.list$all_genes[,1],
OrgDb = org.Ce.eg.db)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Print text and tables to console for bookdown rendering # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # GSEA # # #
cat(paste0("## GSEA \n\n"))
# Print GSEA table
t0 <- list(datatable(data.frame(go.result.list$go_gse[[1]]),
extensions = 'Buttons',
caption = " GSE Of All DEGs.",
options = list(
dom = "Bfrtip",
buttons = c('csvHtml5'),
pageLength = 5)))
print(htmltools::tagList(t0[[1]]))
cat("\n\n")
# Tabset for GSEA plots
cat(sprintf("#### %s %s GSEA Plots {.tabset .tabset-pills}\n\n", strain, fraction))
# Print GSEA plot for each set identified
for (i in 1:nrow(data.frame(go.result.list$go_gse[[1]]))) {
cat(sprintf("##### %s \n\n", go.result.list$go_gse[[1]]$Description[i]))
print(gseaplot(go.result.list$go_gse[[1]], by = "all", title = go.result.list$go_gse[[1]]$Description[i], geneSetID = go.result.list$go_gse[[1]]$ID[i]))
cat("\n\n")
}
# # # Upregulated GO classification and GO over-representation # # #
cat(paste0("\n\n## Upregulated\n\n"))
if(nrow(data.frame(go.result.list$go_class_upreg[[1]])) != 0){
t1 <- list(datatable(data.frame(go.result.list$go_class_upreg[[1]]),
extensions = 'Buttons',
caption = " GO Classification of Upregulated Genes.",
options = list(
dom = "Bfrtip",
buttons = c('csvHtml5'),
pageLength = 5)))
print(htmltools::tagList(t1[[1]]))
cat("\n\n")
write.csv(go.result.list$go_class_upreg[[1]], file = sprintf("./results/%s_%s_GO.classification.upreg.csv",gsub(" ", "_", tolower(strain)),tolower(fraction)))
}else{
emptyTableMessage(' GO Classification of Upregulated Genes')
}
if(nrow(data.frame(go.result.list$go_overrep_upreg[[1]])) != 0){
t2 <- list(datatable(data.frame(go.result.list$go_overrep_upreg[[1]]),
extensions = "Buttons",
caption = " GO Over-representation Analysis of Upregulated Genes.",
options = list(
dom = "Bfrtip",
buttons = c('csvHtml5'),
pageLength = 5)))
print(htmltools::tagList(t2[[1]]))
cat("\n\n")
write.csv(go.result.list$go_overrep_upreg[[1]], file = sprintf("./results/%s_%s_GO.overrepresented.upreg.csv",gsub(" ", "_", tolower(strain)),tolower(fraction)))
}else{
emptyTableMessage(' GO Over-representation Analysis of Upregulated Genes')
}
# # # Downregulated GO classification and GO over-representation # # #
cat(paste0("\n\n## Downregulated\n\n"))
if(nrow(data.frame(go.result.list$go_class_downreg[[1]])) != 0){
t3 <- list(datatable(data.frame(go.result.list$go_class_downreg[[1]]),
extensions = "Buttons",
caption = " GO Classification of Downregulated Genes.",
options = list(
dom = "Bfrtip",
buttons = c('csvHtml5'),
pageLength = 5)))
print(htmltools::tagList(t3[[1]]))
cat("\n\n")
write.csv(go.result.list$go_class_downreg[[1]], file = sprintf("./results/%s_%s_GO.classification.downreg.csv",gsub(" ", "_", tolower(strain)),tolower(fraction)))
}else{
emptyTableMessage(' GO Classification of Downregulated Genes')
}
if(nrow(data.frame(go.result.list$go_overrep_downreg[[1]])) != 0){
t4 <- list(datatable(data.frame(go.result.list$go_overrep_downreg[[1]]),
extensions = "Buttons",
caption = " GO Over-representation Analysis of Downregulated Genes.",
options = list(
dom = "Bfrtip",
buttons = c('csvHtml5'),
pageLength = 5)))
print(htmltools::tagList(t4[[1]]))
cat("\n\n")
write.csv(go.result.list$go_overrep_downreg[[1]], file = sprintf("./results/%s_%s_GO.overrepresented.downreg.csv",gsub(" ", "_", tolower(strain)),tolower(fraction)))
}else{
emptyTableMessage(' GO Over-representation Analysis of Downregulated Genes')
}
rm(t0, t1, t2, t3, t4)
}
# # # # # # # # # # # # # # # # # # # # # # # # #
# # Translating Biological IDS # #
# # # # # # # # # # # # # # # # # # # # # # # # #
# # # From WormBase to ENTREZID
translateBioIDs <- function (DESeqResults.obj, bioID, return.success = TRUE, l2fc_filter, padj_filter) {
if (bioID != "WORMBASE"){
stop("⛔️ ERROR: Only WORMBASE biological ID supported as input.")
}
# Convert DESeq2 results object to DataFrame, and edit structure
gene.df <- DESeqResults.obj %>%
data.frame() %>%
rownames_to_column("WORMBASE") %>%
dplyr::select(WORMBASE, log2FoldChange, pvalue, padj)
# Biological ID TranslatoR
gene_ids <- bitr(gene.df[,'WORMBASE'],
fromType = "WORMBASE",
toType = "ENTREZID",
OrgDb = org.Ce.eg.db)
cat("\n")
# Record transfer success
if (return.success == TRUE){
t1 <- table(gene.df$WORMBASE %in% gene_ids$WORMBASE)
cat("**Transfering WORMBASE gene IDs to ENTREZID:**\n\n")
cat("```r\n")
cat(paste0(t1[["FALSE"]]," gene IDs were not transfered from WORMBASE to ENTREZID \n"))
cat(paste0(t1[["TRUE"]]," gene IDs were successfully transfered from WORMBASE to ENTREZID \n\n"))
cat("```\n\n")
}
# Clear duplicate genes following ID transfer
gene_ids <- gene_ids %>%
distinct(WORMBASE, .keep_all = T) %>%
column_to_rownames("WORMBASE")
# Add ENTREZID to L2FC carrying DataFrame
gene.df <- gene.df %>%
mutate(ENTREZID = gene_ids[WORMBASE, 1]) %>%
dplyr::select(ENTREZID, log2FoldChange, pvalue, padj) %>%
subset(!is.na(ENTREZID)) # Remove NA ENTREZ ID values
rownames(gene.df) <- NULL
# # Subset L2FC Data # #
# Subset for highly varirable genes
variable.genes <- gene.df %>%
subset(abs(log2FoldChange) > l2fc_filter & padj < padj_filter & !isNA(padj))
rownames(variable.genes) <- NULL
# report number of highly variable genes used for analysis
cat(sprintf("**Number of highly variable genes identified (with | L2FC | > %s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter)))
cat("```r\n")
cat(length(variable.genes[[1]]))
cat("\n\n```\n\n")
# Subset for highly upregulated genes
upregulated.genes <- gene.df %>%
subset(log2FoldChange > l2fc_filter & padj < padj_filter & !isNA(padj))
rownames(upregulated.genes) <- NULL
# report number of highly variable genes used for analysis
cat(sprintf("**Number of highly upregulated genes identified (with L2FC > %s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter)))
cat("```r\n")
cat(length(upregulated.genes[[1]]))
cat("\n\n```\n\n")
# Subset for highly downregulated genes
downregulated.genes <- gene.df %>%
subset(log2FoldChange < -l2fc_filter & padj < padj_filter & !isNA(padj))
rownames(downregulated.genes) <- NULL
# report number of highly variable genes used for analysis
cat(sprintf("**Number of highly downregulated genes identified (with L2FC < -%s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter)))
cat("```r\n")
cat(length(downregulated.genes[[1]]))
cat("\n\n```\n\n")
# Subset for all genes (universe DataFrame)
all.genes <- gene.df %>%
subset(!isNA(padj)) %>%
arrange(desc(log2FoldChange))
# Vectorized differentially expressed genes
vectorized <- all.genes[,2]
names(vectorized) <- all.genes[,1]
# Save all data to list
gene.list <- list(variable.genes, upregulated.genes, downregulated.genes, all.genes, vectorized)
names(gene.list) <- c("variable_genes", "upregulated_genes", "downregulated_genes", "all_genes", "vectorized_all_DE")
return(gene.list)
}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # Gene Ontology Enrichment Analysis # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
runGO <- function(geneNames.up, geneNames.down, geneList, universe, OrgDb){
# # # # # # # # # # # # # # # # # # # # # # #
# # # Analysis of Upregulated Genes # # #
# # # # # # # # # # # # # # # # # # # # # # #
# # GO Classification # #
go_class_upreg <- groupGO(gene = geneNames.up,
OrgDb = OrgDb,
ont = "CC",
level = 3,
readable = TRUE) %>%
arrange(desc(Count))
# # GO Over-Representation Analysis # #
go_overrep_upreg <- enrichGO(gene = geneNames.up,
universe = universe,
OrgDb = OrgDb,
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE) %>%
arrange(desc(Count))
# # # # # # # # # # # # # # # # # # # # # # # #
# # # Analysis of Downregulated Genes # # #
# # # # # # # # # # # # # # # # # # # # # # # #
# # GO Classification # #
go_class_downreg <- groupGO(gene = geneNames.down,
OrgDb = OrgDb,
ont = "CC",
level = 3,
readable = TRUE) %>%
arrange(desc(Count))
# # GO Over-Representation Analysis # #
go_overrep_downreg <- enrichGO(gene = geneNames.down,
universe = universe,
OrgDb = OrgDb,
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE) %>%
arrange(desc(Count))
# # # # # # # # # # # # # # # # # # # # # # # #
# # GO Gene Set Enrichment Analysis # #
# # # # # # # # # # # # # # # # # # # # # # # #
go_gene_set_enrichment <- gseGO(geneList = geneList,
OrgDb = OrgDb,
ont = "CC",
minGSSize = 100,
maxGSSize = 500,
pvalueCutoff = 0.05,
verbose = FALSE)
go_results <- list(
list(go_class_upreg),
list(go_overrep_upreg),
list(go_class_downreg),
list(go_overrep_downreg),
list(go_gene_set_enrichment)
)
names(go_results) <- c(
"go_class_upreg",
"go_overrep_upreg",
"go_class_downreg",
"go_overrep_downreg",
"go_gse"
)
return(go_results)
}
# Lysate
lys <- readRDS("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/intermediate_data/deseq2_dds_objects/lotr1_isolatedStrain_Lysate_BatchCor_dds.rds")
# Polysome
ply <- readRDS("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/intermediate_data/deseq2_dds_objects/lotr1_isolatedStrain_Polysome_BatchCor_dds.rds")
# Full KO vs WT
full_vs_wt.lys <- results(lys, name = "strain_DUP207_vs_DUP167")
full_vs_wt.ply <- results(ply, name = "strain_DUP207_vs_DUP167")
# Tudor KO vs WT
tudor_vs_wt.lys <- results(lys, name = "strain_DUP185_vs_DUP167")
tudor_vs_wt.ply <- results(ply, name = "strain_DUP185_vs_DUP167")
# LOTUS KO vs WT
lotus_vs_wt.lys <- results(lys, name = "strain_DUP217_vs_DUP167")
lotus_vs_wt.ply <- results(ply, name = "strain_DUP217_vs_DUP167")
Inputs for function call:
strainFractionGO(result.obj = full_vs_wt.lys, strain = "Full LOTR-1 KO vs WT",
fraction = "Lysate", l2fc_filter = 0.8, padj_filter = 0.05)
Transfering WORMBASE gene IDs to ENTREZID:
542 gene IDs were not transfered from WORMBASE to ENTREZID
15111 gene IDs were successfully transfered from WORMBASE to ENTREZID
Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:
278
Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:
237
Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:
41
Table GO Over-representation Analysis of Upregulated Genes has no rows.
Table GO Over-representation Analysis of Downregulated Genes has no rows.
Inputs for function call:
strainFractionGO(result.obj = full_vs_wt.ply, strain = "Full LOTR-1 KO vs WT",
fraction = "Polysome", l2fc_filter = 0.8, padj_filter = 0.05)
Transfering WORMBASE gene IDs to ENTREZID:
374 gene IDs were not transfered from WORMBASE to ENTREZID
14732 gene IDs were successfully transfered from WORMBASE to ENTREZID
Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:
129
Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:
108
Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:
21
Table GO Over-representation Analysis of Upregulated Genes has no rows.
Inputs for function call:
strainFractionGO(result.obj = tudor_vs_wt.lys, strain = "Tudor LOTR-1 KO vs WT",
fraction = "Lysate", l2fc_filter = 0.8, padj_filter = 0.05)
Transfering WORMBASE gene IDs to ENTREZID:
542 gene IDs were not transfered from WORMBASE to ENTREZID
15111 gene IDs were successfully transfered from WORMBASE to ENTREZID
Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:
186
Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:
148
Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:
38
Table GO Over-representation Analysis of Upregulated Genes has no rows.
Table GO Over-representation Analysis of Downregulated Genes has no rows.
Inputs for function call:
strainFractionGO(result.obj = tudor_vs_wt.ply, strain = "Tudor LOTR-1 KO vs WT",
fraction = "Polysome", l2fc_filter = 0.8, padj_filter = 0.05)
Transfering WORMBASE gene IDs to ENTREZID:
374 gene IDs were not transfered from WORMBASE to ENTREZID
14732 gene IDs were successfully transfered from WORMBASE to ENTREZID
Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:
78
Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:
60
Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:
18
Table GO Over-representation Analysis of Upregulated Genes has no rows.
Inputs for function call:
strainFractionGO(result.obj = lotus_vs_wt.lys, strain = "LOTUS LOTR-1 KO vs WT",
fraction = "Lysate", l2fc_filter = 0.8, padj_filter = 0.05)
Transfering WORMBASE gene IDs to ENTREZID:
542 gene IDs were not transfered from WORMBASE to ENTREZID
15111 gene IDs were successfully transfered from WORMBASE to ENTREZID
Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:
430
Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:
237
Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:
193
Inputs for function call:
strainFractionGO(result.obj = lotus_vs_wt.ply, strain = "LOTUS LOTR-1 KO vs WT",
fraction = "Polysome", l2fc_filter = 0.8, padj_filter = 0.05)
Transfering WORMBASE gene IDs to ENTREZID:
374 gene IDs were not transfered from WORMBASE to ENTREZID
14732 gene IDs were successfully transfered from WORMBASE to ENTREZID
Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.05) is:
169
Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.05) is:
63
Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.05) is:
106
Table GO Over-representation Analysis of Upregulated Genes has no rows.